package com.spider.core;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;

import com.spider.downloader.Scheduler;
import com.spider.util.CatchUtil;




/**
 * 此类为爬虫类主程序入口
 * @author liuhao
 *
 */
public class Spider {
	
	public String catch_url = "http://bj.ganji.com/";

	public Spider(){
		
	}
	public static void main(String[] args) {
		Spider spider = new Spider();
		spider.getCatch();
	}
	
	/**
	 * 开始一级页面
	 */
	public void getCatch() {
		try {
			URL url = new URL(catch_url);
			HttpURLConnection conn = (HttpURLConnection) url.openConnection();
			conn = CatchUtil.setRequestProperty(conn);
			String catch_str = CatchUtil.bufferRead(conn.getInputStream());
			Scheduler task = new Scheduler();
			task.cleanHtml(catch_str);
		} catch (IOException e) {
			e.printStackTrace();
		}

	}
}
